## [1] "Output created on 2018-09-10"
## [1] "Dataset used: /kriegsteinlab/data1/aparna/homefiles/gw14.RData"
## [1] "Metadata file used: /kriegsteinlab/data1/carmen/2nd-trimester/metadata/colnames_GW14_080818_fixed.txt"
## [1] "Sample Age (from user input): GW14"
.Rmd script.To run this R Markdown file, enter the following chunks of code in an R session, with the appropriate paths and file names.
Call the knitr::render function indicating the path to this .Rmd file:
.RData workspace file), and the name of the Seurat object contained within the loaded workspace.age <- "GW14"
render("/kriegsteinlab/data1/path/to/file/QC_10X_2018-09-08.Rmd",
output_file=paste0("QC_10X_", age, Sys.Date(), ".html"),
params = list(dataset="/kriegsteinlab/data1/aparna/homefiles/gw00.RData",
metadata.file="/kriegsteinlab/data1/carmen/2nd-trimester/metadata/colnames_GW00.txt",
age = age
seurat.obj = "gw00",
set_title=paste0(age, " QC Plots"),
nGene.limit = 5000,
nUMI.limit = 10000,
x.textsize = 12)
)age <- params$age
dataset <- params$dataset
metadata.file <- params$metadata.file
seurat.obj <- params$seurat.obj
nGene.limit <- params$nGene.limit
nUMI.limit <- params$nUMI.limit
x.textsize <- params$x.textsizemultiplot function.library(ggplot2)
library(scales)
library(viridis)
library(tidyr)
library(dplyr)
library(janitor)
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}We load the data and metadata before running the .Rmd file, as this takes a long time. Otherwise, it will be loaded everytime the script is run (loading is not cached.)
We create a new Seurat object using the raw data in the original Seurat object. For compatibility issues with the new version of Seurat.
if(!exists("seurat.obj.new") & !exists(get("seurat.obj"))) {
load(dataset)
assign("seurat.obj.new", CreateSeuratObject(raw.data = get(seurat.obj)@raw.data,
min.cells = 0, min.genes = 0, project=meta$sample.age))
rm(list=seurat.obj)
}
if(!exists("seurat.obj.new") & exists(get("seurat.obj"))) {
assign("seurat.obj.new", CreateSeuratObject(raw.data = get(seurat.obj)@raw.data,
min.cells = 0, min.genes = 0, project=meta$sample.age))
rm(list=seurat.obj)
}
if(!exists("meta")){
meta <- read_tsv(metadata.file, col_names = c("cell.name", "brain.region", "sample.age"))
# OPTIONAL: Notify when loading is finished.
}
RPushbullet::pbPost("note",
title = paste0("Finished loading"), body = paste0("Dataset ", age),
debug = F, devices = c("Pixel", "Chrome"))In both the @ident and the @meta.data slots of the Seurat object.
if( ! length(seurat.obj.new@ident) > 0) {
seurat.obj.new@meta.data$orig.ident <- factor(meta$brain.region)
seurat.obj.new <- SetIdent(seurat.obj.new, cells.use = WhichCells(seurat.obj.new),
ident.use = meta$brain.region)
}
# Make table of cell counts per region.
seurat.obj.new@meta.data %>% tabyl(orig.ident) %>%
adorn_totals(where="row") %>% arrange(desc(n)) if(sum(seurat.obj.new@meta.data$percent.ribosomal)==0 & sum(seurat.obj.new@meta.data$percent.mitochondrial)==0) {
ribogenes <- read_table("/kriegsteinlab/data1/carmen/2nd-trimester/qc/ribogenes.txt",
col_names = F)$X1
mito.genes <- grep(pattern="^MT-", x=rownames(seurat.obj.new@data), value = TRUE)
percent.mito <- colSums(seurat.obj.new@raw.data[mito.genes, ])/colSums(seurat.obj.new@raw.data)
seurat.obj.new <- AddMetaData(object = seurat.obj.new,
metadata = percent.mito, col.name = "percent.mitochondrial")
ribo <- seurat.obj.new@raw.data[ribogenes, ]
percent.ribo <- colSums(ribo)/colSums(seurat.obj.new@raw.data)
seurat.obj.new <- AddMetaData(object = seurat.obj.new,
metadata = percent.ribo, col.name = "percent.ribosomal")
}statsByArea <- seurat.obj.new@meta.data %>% dplyr::group_by(orig.ident) %>%
summarise(mean.nGene = mean(nGene), mean.nUMI = mean(nUMI),
mean.percentmito = mean(percent.mitochondrial),
mean.percentribo = mean(percent.ribosomal))
statsByAreaSome useful metadata stats:
## nGene nUMI percent.mitochondrial percent.ribosomal
## Min. : 41 Min. : 191 Min. :0.000 Min. :0.0081
## 1st Qu.: 369 1st Qu.: 580 1st Qu.:0.013 1st Qu.:0.1903
## Median : 507 Median : 855 Median :0.018 Median :0.2319
## Mean : 580 Mean : 1056 Mean :0.019 Mean :0.2364
## 3rd Qu.: 684 3rd Qu.: 1253 3rd Qu.:0.024 3rd Qu.:0.2772
## Max. :8426 Max. :60900 Max. :0.146 Max. :0.7857
(Not working currently.)
violin.discrete and violinplot.colorby.roundUpNice <- function(x, nice=c(1,2,4,5,6,8,10)) {
if(length(x) != 1) stop("'x' must be of length 1")
10^floor(log10(x)) * nice[[which(x <= 10^floor(log10(x)) * nice)[[1]]]]
}
violin.discrete <- function(y, ylimit, title, xlabelsize){
ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, get(y), colour=orig.ident), alpha=0.2, size=0.25) +
geom_violin(aes(orig.ident, get(y)), colour="grey60", scale = "width", alpha=0.3) +
scale_color_manual(values = c("#E58606", "#4885ed", "#52BCA3", "#99C945", "#ffc300",
"#db3236", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1",
"#52BCA3", "#99C945")) +
theme_minimal() +
theme(plot.title = element_text(size= 16, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
axis.text.x = element_text(size=xlabelsize, angle = 30)
) +
scale_y_continuous(breaks=seq(0, roundUpNice(ylimit), by=roundUpNice(ylimit)/10),
limits=c(0, ylimit)) +
scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
labs(title=paste0(title, "\n", age),
x="",
y=y
) +
guides(fill=FALSE, colour=FALSE)
}violinplot.colorby <- function(y, ylimit, colorby, title, xlabelsize){
if (colorby == quo(nUMI)) {
colorby.aes <- quo(log2(!! colorby))
} else {
colorby.aes <- colorby
}
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(!! colorby)
ggplot(seurat.obj.new@meta.data %>% filter(get(y) < ylimit)) +
# %>% filter(!! y < ylimit)
geom_jitter(aes_string("orig.ident", y, colour=colorby.aes), alpha=0.6, size=0.25) +
scale_color_viridis(option="magma", begin = .1, end = .95) +
geom_violin(aes_string("orig.ident", y),
colour="grey60", colour="grey60", alpha=0.1, scale="width") +
theme_minimal() +
theme(plot.title = element_text(size= 13, hjust = 0.2),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
legend.position=c(0.75, 1),
legend.key.size = unit(0.5,"cm"),
legend.direction = "horizontal",
legend.text = element_text(angle = 30, size = 8),
legend.title = element_text(size = 9),
axis.text.x = element_text(size=x.textsize, angle = 30)
# legend.title.align = 0.9
) +
scale_y_continuous(breaks=seq(0, roundUpNice(ylimit), by=roundUpNice(ylimit)/10),
limits=c(0, ylimit)) +
labs(title=paste0(title, "\n", age),
x="",
y=y,
color = colorby.aes)
}seurat.obj.new@meta.data$orig.ident <- ordered(seurat.obj.new@meta.data$orig.ident,
levels = (statsByArea %>%
arrange(mean.nGene))$orig.ident)
y <- "nGene"
ylimit <- max(seurat.obj.new@meta.data$nGene)
title="1a. Genes per Cell"
xlabelsize=16
p1a <- violin.discrete(y, ylimit, title, xlabelsize)
p1ay <- "nGene"
ylimit <- nGene.limit
title="1b. Genes per Cell (Zoom-in)"
xlabelsize=x.textsize
p1b <- violin.discrete(y, ylimit, title, xlabelsize)
p1by <- "nUMI"
ylimit <- max(seurat.obj.new@meta.data$nUMI)
title="3a. UMIs per Cell"
xlabelsize=x.textsize
p3a <- violin.discrete(y, ylimit, title, xlabelsize)
p3ay <- "nUMI"
ylimit <- nUMI.limit
title="3b. UMIs per Cell (Zoom-in)"
xlabelsize=x.textsize
p3b <- violin.discrete(y, ylimit, title, xlabelsize)
p3by <- "nGene"
colorby <- quo(nUMI)
title <- "2. Genes vs UMIs per Cell (Zoom-in)"
ylimit <- nGene.limit
xlabelsize=16
p2 <- violinplot.colorby(y, ylimit, colorby, title, xlabelsize)
p2y <- "nUMI"
colorby <- quo(nGene)
title <- "4. UMIs vs Genes per Cell (Zoom-in)"
ylimit <- nUMI.limit
xlabelsize=x.textsize
p4 <- violinplot.colorby(y, ylimit, colorby, title, xlabelsize)
p4y <- "percent.ribosomal"
title <- "5. % Ribosomal Genes per Cell"
ylimit <- max(seurat.obj.new@meta.data$percent.ribosomal)
xlabelsize=x.textsize
p5 <- violin.discrete(y, ylimit, title, xlabelsize)
p5y <- "percent.mitochondrial"
title <- "6. % Mitochondrial Genes per Cell"
ylimit <- max(seurat.obj.new@meta.data$percent.mitochondrial)
xlabelsize=x.textsize
p6 <- violin.discrete(y, ylimit, title, xlabelsize)
p6y <- "nGene"
colorby <- quo(percent.ribosomal)
title <- "9. Total vs % Ribosomal Genes per Cell (Zoom-in)"
ylimit <- nGene.limit
xlabelsize=x.textsize
p7b <- violinplot.colorby(y, ylimit, colorby, title, xlabelsize)
p7by <- "nGene"
colorby <- quo(percent.mitochondrial)
title <- "10. Total vs % Mitochondrial Genes per Cell (Zoom-in)"
ylimit <- nGene.limit
xlabelsize=x.textsize
p8b <- violinplot.colorby(y, ylimit, colorby, title, xlabelsize)
p8by <- "percent.ribosomal"
colorby <- quo(percent.mitochondrial)
title <- "7. % Ribosomal vs % Mitochondrial Genes per Cell"
ylimit <- max(seurat.obj.new@meta.data$percent.ribosomal)
xlabelsize=x.textsize
p9 <- violinplot.colorby(y, ylimit, colorby, title, xlabelsize)
p9y <- "percent.mitochondrial"
colorby <- quo(percent.ribosomal)
title <- "8. % Mitochondrial vs % Ribosomal Genes per Cell"
ylimit <- max(seurat.obj.new@meta.data$percent.mitochondrial)
xlabelsize=x.textsize
p10 <- violinplot.colorby(y, ylimit, colorby, title, xlabelsize)
p10ggplot.fun and print.plot functions.ggplot.fun <- function(x, y, colorby, plottitle) {
scatterplot <- ggplot(seurat.obj.new@meta.data) +
geom_point(aes_string(x, y, colour=colorby), alpha=0.6, size=0.6) +
scale_color_viridis(option="magma", begin = .1, end = .95) +
theme_minimal() +
theme(plot.title = element_text(size= 20, hjust = 0.5),
axis.title.y=element_text(vjust = 2, size=14),
axis.title.x=element_text(vjust = -1, size=14),
legend.position="bottom",
legend.key.size = unit(0.5,"cm"),
legend.direction = "horizontal",
legend.text = element_text(angle = 30, size = 7),
legend.title = element_text(size = 10),
strip.text = element_text(size=16)
# legend.title.align = 0.9
) +
# scale_y_continuous(breaks=seq(0, nGene.limit, by=nGene.limit/10),
# limits=c(0, nGene.limit)) +
labs(title=paste0(age, ": ", plottitle),
x=x,
y=y,
color= colorby) +
facet_wrap( ~ orig.ident, ncol=3)
}print.plot <- function(x, y, colorby, arrangeby, plottitle) {
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(!! arrangeby)
scatterplot <- ggplot.fun(x, y, colorby, plottitle)
png(paste0(age, "_scatter_", x, "_vs_", y, ".png"),
width=11, height=8, units="in", res=300)
print(scatterplot)
dev.off()
scatterplot
}x <- "nGene"
y <- "log2(nUMI)"
colorby <- "log2(nUMI)"
arrangeby <- quo(nUMI)
title <- "Genes vs UMIs per Cell"
print.plot(x, y, colorby, arrangeby, title)x <- "nGene"
y <- "percent.mitochondrial"
colorby <- "log2(nUMI)"
arrangeby <- quo(nUMI)
title <- "Total vs %Mitochondrial\n Genes per Cell"
print.plot(x, y, colorby, arrangeby, title)x <- "nGene"
y <- "percent.ribosomal"
colorby <- "log2(nUMI)"
arrangeby <- quo(nUMI)
title <- "Total vs % Ribosomal\n Genes per Cell"
print.plot(x, y, colorby, arrangeby, title)x <- "percent.ribosomal"
y <- "percent.mitochondrial"
colorby <- "nGene"
arrangeby <- quo(nGene)
title <- "% Ribosomal vs % Mitochondrial\n Genes per Cell".Rds fileFor lighter loading. Add sample age column to @metadata.
seurat.obj.path <- paste0("/kriegsteinlab/data1/carmen/2nd-trimester/data/seuratobjects/",
age, "_newSeuratObj.rds")
if(!file.exists(seurat.obj.path)){
seurat.obj.new@meta.data %<>%
mutate(cell.name=rownames(.), sample.age=seurat.obj.new@project.name) %>%
select(cell.name, everything())
meta.from.seurat <- seurat.obj.new@meta.data
saveRDS(seurat.obj.new, seurat.obj.path)
}@metadata slot to a text fileFor analyzing QC stats later without having to load the entire dataset. This will come in handy when comparing QC stats across samples.